**Due by midnight on Saturday October 23 2021**. Answer all of the following problems. These problems should be completed in this notebook (using the R kernel). Computational questions may require code, plots, analysis, interpretation, etc. Working in small groups is allowed, but it is important that you make an effort to master the material and hand in your own work.
Load the \({\tt prostate}\) data into R using the faraway package. The prostate data frame has 97 rows and 9 columns. It comes from a study on 97 men with prostate cancer who were due to receive a radical prostatectomy.
library(faraway)
data(prostate)
head(prostate)
## lcavol lweight age lbph svi lcp gleason pgg45 lpsa
## 1 -0.5798185 2.7695 50 -1.386294 0 -1.38629 6 0 -0.43078
## 2 -0.9942523 3.3196 58 -1.386294 0 -1.38629 6 0 -0.16252
## 3 -0.5108256 2.6912 74 -1.386294 0 -1.38629 7 20 -0.16252
## 4 -1.2039728 3.2828 58 -1.386294 0 -1.38629 6 0 -0.16252
## 5 0.7514161 3.4324 62 -1.386294 0 -1.38629 6 0 0.37156
## 6 -1.0498221 3.2288 50 -1.386294 0 -1.38629 6 0 0.76547
mlr = lm(prostate$lpsa ~ prostate$lcavol + prostate$lweight + prostate$age + prostate$lbph + prostate$svi + prostate$lcp + prostate$gleason + prostate$pgg45)
head(mlr)
## $coefficients
## (Intercept) prostate$lcavol prostate$lweight prostate$age
## 0.669336698 0.587021826 0.454467424 -0.019637176
## prostate$lbph prostate$svi prostate$lcp prostate$gleason
## 0.107054031 0.766157326 -0.105474263 0.045141598
## prostate$pgg45
## 0.004525231
##
## $residuals
## 1 2 3 4 5 6
## -1.305198480 -0.886561920 -0.706208030 -0.746727007 -1.349942595 -0.041782996
## 7 8 9 10 11 12
## -1.141337122 -1.273035810 -0.120476684 -0.259043503 -0.182056003 0.437325653
## 13 14 15 16 17 18
## -0.852569279 -0.571646890 -0.659150931 -0.490886516 0.173458070 -0.965559088
## 19 20 21 22 23 24
## 0.270181218 -0.135478348 -0.367958705 -1.054973183 0.638822834 -0.843041318
## 25 26 27 28 29 30
## -0.063871290 -0.286165662 -0.152952682 0.193749858 -0.234641701 -0.398687863
## 31 32 33 34 35 36
## -0.060148239 -0.867009935 0.051348218 0.653057393 0.952755370 -0.684507903
## 37 38 39 40 41 42
## 0.232269856 1.049580300 -1.733147829 0.414133656 0.248132796 0.094282397
## 43 44 45 46 47 48
## 0.276949085 -0.016717571 0.087342839 -0.032752717 -1.512237448 -0.007740796
## 49 50 51 52 53 54
## -0.116577443 0.456412900 0.326745017 -0.273088165 0.532058379 -0.297967802
## 55 56 57 58 59 60
## -0.591594097 -0.101459301 1.034653832 0.534542461 0.694812905 0.185891239
## 61 62 63 64 65 66
## 0.615313405 -0.676727354 -0.156643258 -0.811925558 0.337250761 0.223022915
## 67 68 69 70 71 72
## -0.047925122 -0.077957525 1.638119900 0.172245699 -0.271116586 0.918098326
## 73 74 75 76 77 78
## 0.289579209 -0.468883429 -0.371850841 -0.371293914 0.180030656 0.072686671
## 79 80 81 82 83 84
## -0.017000212 0.357079070 1.352281797 0.537624831 0.004406231 0.203862155
## 85 86 87 88 89 90
## 0.945675352 -0.279468843 0.672999128 0.675053596 -0.146303147 0.784033542
## 91 92 93 94 95 96
## 0.657246540 0.059300838 0.562172376 -0.020858043 1.525599382 1.289629827
## 97
## 1.491038202
##
## $effects
## (Intercept) prostate$lcavol prostate$lweight prostate$age
## -24.409281242 8.306793996 2.438954830 -0.648059451
## prostate$lbph prostate$svi prostate$lcp prostate$gleason
## 1.033943275 -2.439605727 -0.359541426 0.841139051
## prostate$pgg45
## -0.725088097 0.116330364 0.179616600 0.548550129
##
## -0.651455774 -0.301171074 -0.484356513 -0.050025477
##
## 0.097123635 -0.613594228 0.581579750 0.007804419
##
## 0.029456919 -1.151018667 0.930681266 -0.906903837
##
## 0.097910508 0.033404548 -0.061138061 0.107608067
##
## -0.255857555 -0.093157682 0.015801364 -0.847397500
##
## 0.305549993 1.005846716 1.290453081 -0.728148143
##
## 0.088004116 1.295281098 -1.794385031 0.417455975
##
## 0.050625440 0.325336238 0.370075280 0.099494981
##
## 0.053082823 0.066537636 -2.033310989 -0.102607306
##
## 0.277432003 0.643427771 0.484191190 -0.129291263
##
## 0.383402606 -0.458027757 -0.292159873 -0.159466391
##
## 1.168287654 0.677939112 0.730628600 0.163654192
##
## 0.692647916 -0.779814356 -0.436769409 -0.978396429
##
## 0.768358376 0.189014503 -0.228132106 -0.134985643
##
## 1.937994325 0.062271848 -0.160785981 0.933216400
##
## 0.475882553 -0.828531126 -0.201857645 -0.320167335
##
## 0.006777177 0.069859939 0.062707770 0.493163388
##
## 1.361505217 0.434601846 0.166449457 -0.261800413
##
## 0.820459300 -0.153893094 0.877655582 0.879287759
##
## -0.105251680 0.748276415 1.116725746 0.190445229
##
## 0.708180853 0.107721172 1.672800974 1.203790855
##
## 1.494601173
##
## $rank
## [1] 9
##
## $fitted.values
## 1 2 3 4 5 6 7 8
## 0.8744185 0.7240419 0.5436880 0.5842070 1.7215026 0.8072530 1.9068071 2.1274558
## 9 10 11 12 13 14 15 16
## 1.1677967 1.3063635 1.4490060 0.8296243 2.1195193 1.9197169 2.0578709 1.9378065
## 17 18 19 20 21 22 23 24
## 1.2967219 2.4584591 1.2879588 1.7348683 2.0069587 2.7132032 1.0567972 2.5568413
## 25 26 27 28 29 30 31 32
## 1.7955313 2.0526057 1.9530127 1.6227001 2.0830917 2.2933079 1.9843982 2.8752199
## 33 34 35 36 37 38 39 40
## 1.9568618 1.3684926 1.0949346 2.7701779 1.9252901 1.1420697 3.9468978 1.8631363
## 41 42 43 44 45 46 47 48
## 2.0494372 2.2132876 2.0503309 2.3916276 2.4343772 2.5860927 4.0810274 2.5765308
## 49 50 51 52 53 54 55 56
## 2.7080974 2.1351071 2.3300150 2.9506782 2.1523816 2.9892078 3.2963041 2.8194593
## 57 58 59 60 61 62 63 64
## 1.7534362 2.2596875 2.1115771 2.6265188 2.2266866 3.5303174 3.0102333 3.6939256
## 65 66 67 68 69 70 71 72
## 2.5447492 2.6645671 2.9683951 3.0406475 1.3245701 2.8007343 3.2841966 2.1192517
## 73 74 75 76 77 78 79 80
## 2.7667808 3.5438934 3.6471108 3.7088439 3.2127993 3.3629133 3.4748902 3.1559609
## 81 82 83 84 85 86 87 88
## 2.1637282 2.9931352 3.5608938 3.3670778 2.6420046 3.9104588 3.0070909 3.0372964
## 89 90 91 92 93 94 95 96
## 4.1306431 3.2095665 3.3725635 4.0702492 3.8229776 4.7052980 3.6175206 4.1878802
## 97
## 4.0918918
##
## $assign
## [1] 0 1 2 3 4 5 6 7 8
confint(mlr, level = 0.90)
## 5 % 95 %
## (Intercept) -1.485718237 2.824391633
## prostate$lcavol 0.440867156 0.733176497
## prostate$lweight 0.171846568 0.737088281
## prostate$age -0.038210200 -0.001064151
## prostate$lbph 0.009890745 0.204217317
## prostate$svi 0.360029029 1.172285623
## prostate$lcp -0.256770899 0.045822373
## prostate$gleason -0.216620186 0.306903382
## prostate$pgg45 -0.002824333 0.011874796
head(mlr)
## $coefficients
## (Intercept) prostate$lcavol prostate$lweight prostate$age
## 0.669336698 0.587021826 0.454467424 -0.019637176
## prostate$lbph prostate$svi prostate$lcp prostate$gleason
## 0.107054031 0.766157326 -0.105474263 0.045141598
## prostate$pgg45
## 0.004525231
##
## $residuals
## 1 2 3 4 5 6
## -1.305198480 -0.886561920 -0.706208030 -0.746727007 -1.349942595 -0.041782996
## 7 8 9 10 11 12
## -1.141337122 -1.273035810 -0.120476684 -0.259043503 -0.182056003 0.437325653
## 13 14 15 16 17 18
## -0.852569279 -0.571646890 -0.659150931 -0.490886516 0.173458070 -0.965559088
## 19 20 21 22 23 24
## 0.270181218 -0.135478348 -0.367958705 -1.054973183 0.638822834 -0.843041318
## 25 26 27 28 29 30
## -0.063871290 -0.286165662 -0.152952682 0.193749858 -0.234641701 -0.398687863
## 31 32 33 34 35 36
## -0.060148239 -0.867009935 0.051348218 0.653057393 0.952755370 -0.684507903
## 37 38 39 40 41 42
## 0.232269856 1.049580300 -1.733147829 0.414133656 0.248132796 0.094282397
## 43 44 45 46 47 48
## 0.276949085 -0.016717571 0.087342839 -0.032752717 -1.512237448 -0.007740796
## 49 50 51 52 53 54
## -0.116577443 0.456412900 0.326745017 -0.273088165 0.532058379 -0.297967802
## 55 56 57 58 59 60
## -0.591594097 -0.101459301 1.034653832 0.534542461 0.694812905 0.185891239
## 61 62 63 64 65 66
## 0.615313405 -0.676727354 -0.156643258 -0.811925558 0.337250761 0.223022915
## 67 68 69 70 71 72
## -0.047925122 -0.077957525 1.638119900 0.172245699 -0.271116586 0.918098326
## 73 74 75 76 77 78
## 0.289579209 -0.468883429 -0.371850841 -0.371293914 0.180030656 0.072686671
## 79 80 81 82 83 84
## -0.017000212 0.357079070 1.352281797 0.537624831 0.004406231 0.203862155
## 85 86 87 88 89 90
## 0.945675352 -0.279468843 0.672999128 0.675053596 -0.146303147 0.784033542
## 91 92 93 94 95 96
## 0.657246540 0.059300838 0.562172376 -0.020858043 1.525599382 1.289629827
## 97
## 1.491038202
##
## $effects
## (Intercept) prostate$lcavol prostate$lweight prostate$age
## -24.409281242 8.306793996 2.438954830 -0.648059451
## prostate$lbph prostate$svi prostate$lcp prostate$gleason
## 1.033943275 -2.439605727 -0.359541426 0.841139051
## prostate$pgg45
## -0.725088097 0.116330364 0.179616600 0.548550129
##
## -0.651455774 -0.301171074 -0.484356513 -0.050025477
##
## 0.097123635 -0.613594228 0.581579750 0.007804419
##
## 0.029456919 -1.151018667 0.930681266 -0.906903837
##
## 0.097910508 0.033404548 -0.061138061 0.107608067
##
## -0.255857555 -0.093157682 0.015801364 -0.847397500
##
## 0.305549993 1.005846716 1.290453081 -0.728148143
##
## 0.088004116 1.295281098 -1.794385031 0.417455975
##
## 0.050625440 0.325336238 0.370075280 0.099494981
##
## 0.053082823 0.066537636 -2.033310989 -0.102607306
##
## 0.277432003 0.643427771 0.484191190 -0.129291263
##
## 0.383402606 -0.458027757 -0.292159873 -0.159466391
##
## 1.168287654 0.677939112 0.730628600 0.163654192
##
## 0.692647916 -0.779814356 -0.436769409 -0.978396429
##
## 0.768358376 0.189014503 -0.228132106 -0.134985643
##
## 1.937994325 0.062271848 -0.160785981 0.933216400
##
## 0.475882553 -0.828531126 -0.201857645 -0.320167335
##
## 0.006777177 0.069859939 0.062707770 0.493163388
##
## 1.361505217 0.434601846 0.166449457 -0.261800413
##
## 0.820459300 -0.153893094 0.877655582 0.879287759
##
## -0.105251680 0.748276415 1.116725746 0.190445229
##
## 0.708180853 0.107721172 1.672800974 1.203790855
##
## 1.494601173
##
## $rank
## [1] 9
##
## $fitted.values
## 1 2 3 4 5 6 7 8
## 0.8744185 0.7240419 0.5436880 0.5842070 1.7215026 0.8072530 1.9068071 2.1274558
## 9 10 11 12 13 14 15 16
## 1.1677967 1.3063635 1.4490060 0.8296243 2.1195193 1.9197169 2.0578709 1.9378065
## 17 18 19 20 21 22 23 24
## 1.2967219 2.4584591 1.2879588 1.7348683 2.0069587 2.7132032 1.0567972 2.5568413
## 25 26 27 28 29 30 31 32
## 1.7955313 2.0526057 1.9530127 1.6227001 2.0830917 2.2933079 1.9843982 2.8752199
## 33 34 35 36 37 38 39 40
## 1.9568618 1.3684926 1.0949346 2.7701779 1.9252901 1.1420697 3.9468978 1.8631363
## 41 42 43 44 45 46 47 48
## 2.0494372 2.2132876 2.0503309 2.3916276 2.4343772 2.5860927 4.0810274 2.5765308
## 49 50 51 52 53 54 55 56
## 2.7080974 2.1351071 2.3300150 2.9506782 2.1523816 2.9892078 3.2963041 2.8194593
## 57 58 59 60 61 62 63 64
## 1.7534362 2.2596875 2.1115771 2.6265188 2.2266866 3.5303174 3.0102333 3.6939256
## 65 66 67 68 69 70 71 72
## 2.5447492 2.6645671 2.9683951 3.0406475 1.3245701 2.8007343 3.2841966 2.1192517
## 73 74 75 76 77 78 79 80
## 2.7667808 3.5438934 3.6471108 3.7088439 3.2127993 3.3629133 3.4748902 3.1559609
## 81 82 83 84 85 86 87 88
## 2.1637282 2.9931352 3.5608938 3.3670778 2.6420046 3.9104588 3.0070909 3.0372964
## 89 90 91 92 93 94 95 96
## 4.1306431 3.2095665 3.3725635 4.0702492 3.8229776 4.7052980 3.6175206 4.1878802
## 97
## 4.0918918
##
## $assign
## [1] 0 1 2 3 4 5 6 7 8
confint(mlr, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) -1.906960983 3.245634379
## prostate$lcavol 0.412298699 0.761744954
## prostate$lweight 0.116603435 0.792331414
## prostate$age -0.041840618 0.002566267
## prostate$lbph -0.009101499 0.223209561
## prostate$svi 0.280644232 1.251670420
## prostate$lcp -0.286344443 0.075395916
## prostate$gleason -0.267786053 0.358069248
## prostate$pgg45 -0.004260932 0.013311395
head(mlr)
## $coefficients
## (Intercept) prostate$lcavol prostate$lweight prostate$age
## 0.669336698 0.587021826 0.454467424 -0.019637176
## prostate$lbph prostate$svi prostate$lcp prostate$gleason
## 0.107054031 0.766157326 -0.105474263 0.045141598
## prostate$pgg45
## 0.004525231
##
## $residuals
## 1 2 3 4 5 6
## -1.305198480 -0.886561920 -0.706208030 -0.746727007 -1.349942595 -0.041782996
## 7 8 9 10 11 12
## -1.141337122 -1.273035810 -0.120476684 -0.259043503 -0.182056003 0.437325653
## 13 14 15 16 17 18
## -0.852569279 -0.571646890 -0.659150931 -0.490886516 0.173458070 -0.965559088
## 19 20 21 22 23 24
## 0.270181218 -0.135478348 -0.367958705 -1.054973183 0.638822834 -0.843041318
## 25 26 27 28 29 30
## -0.063871290 -0.286165662 -0.152952682 0.193749858 -0.234641701 -0.398687863
## 31 32 33 34 35 36
## -0.060148239 -0.867009935 0.051348218 0.653057393 0.952755370 -0.684507903
## 37 38 39 40 41 42
## 0.232269856 1.049580300 -1.733147829 0.414133656 0.248132796 0.094282397
## 43 44 45 46 47 48
## 0.276949085 -0.016717571 0.087342839 -0.032752717 -1.512237448 -0.007740796
## 49 50 51 52 53 54
## -0.116577443 0.456412900 0.326745017 -0.273088165 0.532058379 -0.297967802
## 55 56 57 58 59 60
## -0.591594097 -0.101459301 1.034653832 0.534542461 0.694812905 0.185891239
## 61 62 63 64 65 66
## 0.615313405 -0.676727354 -0.156643258 -0.811925558 0.337250761 0.223022915
## 67 68 69 70 71 72
## -0.047925122 -0.077957525 1.638119900 0.172245699 -0.271116586 0.918098326
## 73 74 75 76 77 78
## 0.289579209 -0.468883429 -0.371850841 -0.371293914 0.180030656 0.072686671
## 79 80 81 82 83 84
## -0.017000212 0.357079070 1.352281797 0.537624831 0.004406231 0.203862155
## 85 86 87 88 89 90
## 0.945675352 -0.279468843 0.672999128 0.675053596 -0.146303147 0.784033542
## 91 92 93 94 95 96
## 0.657246540 0.059300838 0.562172376 -0.020858043 1.525599382 1.289629827
## 97
## 1.491038202
##
## $effects
## (Intercept) prostate$lcavol prostate$lweight prostate$age
## -24.409281242 8.306793996 2.438954830 -0.648059451
## prostate$lbph prostate$svi prostate$lcp prostate$gleason
## 1.033943275 -2.439605727 -0.359541426 0.841139051
## prostate$pgg45
## -0.725088097 0.116330364 0.179616600 0.548550129
##
## -0.651455774 -0.301171074 -0.484356513 -0.050025477
##
## 0.097123635 -0.613594228 0.581579750 0.007804419
##
## 0.029456919 -1.151018667 0.930681266 -0.906903837
##
## 0.097910508 0.033404548 -0.061138061 0.107608067
##
## -0.255857555 -0.093157682 0.015801364 -0.847397500
##
## 0.305549993 1.005846716 1.290453081 -0.728148143
##
## 0.088004116 1.295281098 -1.794385031 0.417455975
##
## 0.050625440 0.325336238 0.370075280 0.099494981
##
## 0.053082823 0.066537636 -2.033310989 -0.102607306
##
## 0.277432003 0.643427771 0.484191190 -0.129291263
##
## 0.383402606 -0.458027757 -0.292159873 -0.159466391
##
## 1.168287654 0.677939112 0.730628600 0.163654192
##
## 0.692647916 -0.779814356 -0.436769409 -0.978396429
##
## 0.768358376 0.189014503 -0.228132106 -0.134985643
##
## 1.937994325 0.062271848 -0.160785981 0.933216400
##
## 0.475882553 -0.828531126 -0.201857645 -0.320167335
##
## 0.006777177 0.069859939 0.062707770 0.493163388
##
## 1.361505217 0.434601846 0.166449457 -0.261800413
##
## 0.820459300 -0.153893094 0.877655582 0.879287759
##
## -0.105251680 0.748276415 1.116725746 0.190445229
##
## 0.708180853 0.107721172 1.672800974 1.203790855
##
## 1.494601173
##
## $rank
## [1] 9
##
## $fitted.values
## 1 2 3 4 5 6 7 8
## 0.8744185 0.7240419 0.5436880 0.5842070 1.7215026 0.8072530 1.9068071 2.1274558
## 9 10 11 12 13 14 15 16
## 1.1677967 1.3063635 1.4490060 0.8296243 2.1195193 1.9197169 2.0578709 1.9378065
## 17 18 19 20 21 22 23 24
## 1.2967219 2.4584591 1.2879588 1.7348683 2.0069587 2.7132032 1.0567972 2.5568413
## 25 26 27 28 29 30 31 32
## 1.7955313 2.0526057 1.9530127 1.6227001 2.0830917 2.2933079 1.9843982 2.8752199
## 33 34 35 36 37 38 39 40
## 1.9568618 1.3684926 1.0949346 2.7701779 1.9252901 1.1420697 3.9468978 1.8631363
## 41 42 43 44 45 46 47 48
## 2.0494372 2.2132876 2.0503309 2.3916276 2.4343772 2.5860927 4.0810274 2.5765308
## 49 50 51 52 53 54 55 56
## 2.7080974 2.1351071 2.3300150 2.9506782 2.1523816 2.9892078 3.2963041 2.8194593
## 57 58 59 60 61 62 63 64
## 1.7534362 2.2596875 2.1115771 2.6265188 2.2266866 3.5303174 3.0102333 3.6939256
## 65 66 67 68 69 70 71 72
## 2.5447492 2.6645671 2.9683951 3.0406475 1.3245701 2.8007343 3.2841966 2.1192517
## 73 74 75 76 77 78 79 80
## 2.7667808 3.5438934 3.6471108 3.7088439 3.2127993 3.3629133 3.4748902 3.1559609
## 81 82 83 84 85 86 87 88
## 2.1637282 2.9931352 3.5608938 3.3670778 2.6420046 3.9104588 3.0070909 3.0372964
## 89 90 91 92 93 94 95 96
## 4.1306431 3.2095665 3.3725635 4.0702492 3.8229776 4.7052980 3.6175206 4.1878802
## 97
## 4.0918918
##
## $assign
## [1] 0 1 2 3 4 5 6 7 8
summary(mlr)
##
## Call:
## lm(formula = prostate$lpsa ~ prostate$lcavol + prostate$lweight +
## prostate$age + prostate$lbph + prostate$svi + prostate$lcp +
## prostate$gleason + prostate$pgg45)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7331 -0.3713 -0.0170 0.4141 1.6381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.669337 1.296387 0.516 0.60693
## prostate$lcavol 0.587022 0.087920 6.677 2.11e-09 ***
## prostate$lweight 0.454467 0.170012 2.673 0.00896 **
## prostate$age -0.019637 0.011173 -1.758 0.08229 .
## prostate$lbph 0.107054 0.058449 1.832 0.07040 .
## prostate$svi 0.766157 0.244309 3.136 0.00233 **
## prostate$lcp -0.105474 0.091013 -1.159 0.24964
## prostate$gleason 0.045142 0.157465 0.287 0.77503
## prostate$pgg45 0.004525 0.004421 1.024 0.30886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7084 on 88 degrees of freedom
## Multiple R-squared: 0.6548, Adjusted R-squared: 0.6234
## F-statistic: 20.86 on 8 and 88 DF, p-value: < 2.2e-16
The ellipse package and corresponding function should help here. Use the code below (and help files) as a guide.
mlr2 = lm(prostate$lpsa + prostate$lcavol + prostate$lweight + prostate$svi + prostate$lcp + prostate$gleason + prostate$pgg45 ~ prostate$age + prostate$lbph)
confint(mlr2, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) -94.3109012 17.508492
## prostate$age 0.3342793 2.079706
## prostate$lbph -4.8005990 4.156423
summary(mlr2)
##
## Call:
## lm(formula = prostate$lpsa + prostate$lcavol + prostate$lweight +
## prostate$svi + prostate$lcp + prostate$gleason + prostate$pgg45 ~
## prostate$age + prostate$lbph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.52 -25.20 -10.35 19.23 75.45
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -38.4012 28.1587 -1.364 0.17591
## prostate$age 1.2070 0.4395 2.746 0.00723 **
## prostate$lbph -0.3221 2.2556 -0.143 0.88676
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.03 on 94 degrees of freedom
## Multiple R-squared: 0.08118, Adjusted R-squared: 0.06163
## F-statistic: 4.152 on 2 and 94 DF, p-value: 0.0187
print("H0: The regression coefficient is zero | H1: The regression coefficient is not zero")
## [1] "H0: The regression coefficient is zero | H1: The regression coefficient is not zero"
print("P-value of age = 0.00723 which is < alpha 0.05 so we reject H0")
## [1] "P-value of age = 0.00723 which is < alpha 0.05 so we reject H0"
print("P-value of lbph = 0.88676 which is > alpha 0.05 so we fail to reject H0")
## [1] "P-value of lbph = 0.88676 which is > alpha 0.05 so we fail to reject H0"
plot(mlr2)
mlr3 = lm(prostate$age ~ prostate$lpsa + prostate$lcavol + prostate$lweight + prostate$lbph + prostate$svi + prostate$lcp + prostate$gleason + prostate$pgg45)
summary(mlr3)
##
## Call:
## lm(formula = prostate$age ~ prostate$lpsa + prostate$lcavol +
## prostate$lweight + prostate$lbph + prostate$svi + prostate$lcp +
## prostate$gleason + prostate$pgg45)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.6192 -4.1897 0.1752 4.8269 13.4275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.81999 11.33037 3.691 0.000387 ***
## prostate$lpsa -1.72701 0.98259 -1.758 0.082293 .
## prostate$lcavol 2.04919 0.98817 2.074 0.041028 *
## prostate$lweight 3.31716 1.61968 2.048 0.043538 *
## prostate$lbph 1.40722 0.53796 2.616 0.010473 *
## prostate$svi 2.18334 2.40451 0.908 0.366349
## prostate$lcp -1.35386 0.84781 -1.597 0.113875
## prostate$gleason 1.35632 1.47029 0.922 0.358799
## prostate$pgg45 0.05856 0.04124 1.420 0.159116
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.643 on 88 degrees of freedom
## Multiple R-squared: 0.2701, Adjusted R-squared: 0.2038
## F-statistic: 4.071 on 8 and 88 DF, p-value: 0.0003699
plot(mlr3)
mlr4 = lm(prostate$age ~ prostate$lpsa + prostate$svi + prostate$lcp + prostate$gleason + prostate$pgg45)
summary(mlr4)
##
## Call:
## lm(formula = prostate$age ~ prostate$lpsa + prostate$svi + prostate$lcp +
## prostate$gleason + prostate$pgg45)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.925 -3.240 1.281 4.640 13.109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.75427 10.09612 5.027 2.48e-06 ***
## prostate$lpsa 0.62544 0.81920 0.763 0.447
## prostate$svi 0.31421 2.57705 0.122 0.903
## prostate$lcp -0.76629 0.84435 -0.908 0.367
## prostate$gleason 1.48160 1.57838 0.939 0.350
## prostate$pgg45 0.05543 0.04451 1.245 0.216
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.268 on 91 degrees of freedom
## Multiple R-squared: 0.09675, Adjusted R-squared: 0.04712
## F-statistic: 1.95 on 5 and 91 DF, p-value: 0.09377
plot(mlr4)
print("First model is preferred.")
## [1] "First model is preferred."
This link contains advertising data. This dataset contains, in thousands of dollars, TV, Radio, and Newspaper budgets for 200 different markets along with the Sales, in thousands of units, for each market.
require(tidyr)
## Loading required package: tidyr
rawData <- read.csv(url("https://www.colorado.edu/amath/sites/default/files/attached-files/advertising.txt"))
df = separate(data = rawData, col = 1, into = c("Market", "TV", "Radio", "Newspaper", "Sales"))
## Warning: Expected 5 pieces. Additional pieces discarded in 200 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
print(df)
## Market TV Radio Newspaper Sales
## 1 1 230 1 37 8
## 2 2 44 5 39 3
## 3 3 17 2 45 9
## 4 4 151 5 41 3
## 5 5 180 8 10 8
## 6 6 8 7 48 9
## 7 7 57 5 32 8
## 8 8 120 2 19 6
## 9 9 8 6 2 1
## 10 10 199 8 2 6
## 11 11 66 1 5 8
## 12 12 214 7 24 4
## 13 13 23 8 35 1
## 14 14 97 5 7 6
## 15 15 204 1 32 9
## 16 16 195 4 47 7
## 17 17 67 8 36 6
## 18 18 281 4 39 6
## 19 19 69 2 20 5
## 20 20 147 3 23 9
## 21 21 218 4 27 7
## 22 22 237 4 5 1
## 23 23 13 2 15 9
## 24 24 228 3 16 9
## 25 25 62 3 12 6
## 26 26 262 9 3 5
## 27 27 142 9 29 3
## 28 28 240 1 16 7
## 29 29 248 8 27 1
## 30 30 70 6 16 40
## 31 31 292 9 28 3
## 32 32 112 9 17 4
## 33 33 97 2 1 5
## 34 34 265 6 20 0
## 35 35 95 7 1 4
## 36 36 290 7 4 1
## 37 37 266 9 43 8
## 38 38 74 7 49 4
## 39 39 43 1 26 7
## 40 40 228 37 7 32
## 41 41 202 5 22 3
## 42 42 177 33 4 38
## 43 43 293 6 27 7
## 44 44 206 9 8 4
## 45 45 25 1 25 7
## 46 46 175 1 22 5
## 47 47 89 7 9 9
## 48 48 239 9 41 5
## 49 49 227 2 15 8
## 50 50 66 9 11 7
## 51 51 199 8 3 1
## 52 52 100 4 9 6
## 53 53 216 4 41 7
## 54 54 182 6 46 2
## 55 55 262 7 28 8
## 56 56 198 9 49 4
## 57 57 7 3 28 1
## 58 58 136 2 19 2
## 59 59 210 8 49 6
## 60 60 210 7 29 5
## 61 61 53 5 2 21
## 62 62 261 3 42 7
## 63 63 239 3 15 5
## 64 64 102 7 29 6
## 65 65 131 1 42 8
## 66 66 69 9 3 0
## 67 67 31 5 24 6
## 68 68 139 3 14 5
## 69 69 237 4 27 5
## 70 70 216 8 43 9
## 71 71 199 1 30 6
## 72 72 109 8 14 3
## 73 73 26 8 33 19
## 74 74 129 4 5 7
## 75 75 213 4 24 6
## 76 76 16 9 43 7
## 77 77 27 5 1 6
## 78 78 120 5 28 5
## 79 79 5 4 29 9
## 80 80 116 7 7 23
## 81 81 76 4 26 7
## 82 82 239 8 4 1
## 83 83 75 3 20 3
## 84 84 68 4 44 5
## 85 85 213 5 43 33
## 86 86 193 2 18 4
## 87 87 76 3 27 5
## 88 88 110 7 40 6
## 89 89 88 3 25 5
## 90 90 109 8 47 8
## 91 91 134 3 4 9
## 92 92 28 6 1 5
## 93 93 217 7 33 5
## 94 94 250 9 36 5
## 95 95 107 4 14 10
## 96 96 163 3 31 6
## 97 97 197 6 3 5
## 98 98 184 9 21 22
## 99 99 289 7 42 3
## 100 100 135 2 41 7
## 101 101 222 4 4 3
## 102 102 296 4 36 3
## 103 103 280 2 10 1
## 104 104 187 9 17 2
## 105 105 238 2 34 3
## 106 106 137 9 46 4
## 107 107 25 11 29 7
## 108 108 90 4 0 3
## 109 109 13 1 0 4
## 110 110 255 4 26 9
## 111 111 225 8 8 2
## 112 112 241 7 38 23
## 113 113 175 7 15 4
## 114 114 209 6 20 6
## 115 115 78 2 46 8
## 116 116 75 1 35 52
## 117 117 139 2 14 3
## 118 118 76 4 0 8
## 119 119 125 7 36 9
## 120 120 19 4 16 22
## 121 121 141 3 26 8
## 122 122 18 8 21 7
## 123 123 224 2 4 15
## 124 124 123 1 34 6
## 125 125 229 5 32 3
## 126 126 87 2 11 8
## 127 127 7 8 38 9
## 128 128 80 2 0 9
## 129 129 220 3 49 3
## 130 130 59 6 12 43
## 131 131 0 7 39 6
## 132 132 265 2 2 9
## 133 133 8 4 27 2
## 134 134 219 8 33 5
## 135 135 36 9 38 6
## 136 136 48 3 47 8
## 137 137 25 6 39 9
## 138 138 273 7 28 9
## 139 139 43 25 9 20
## 140 140 184 9 43 9
## 141 141 73 4 17 12
## 142 142 193 7 35 4
## 143 143 220 5 33 2
## 144 144 104 6 5 7
## 145 145 96 2 14 8
## 146 146 140 3 1 9
## 147 147 240 1 7 3
## 148 148 243 2 49 44
## 149 149 38 40 3 11
## 150 150 44 7 25 8
## 151 151 280 7 13 9
## 152 152 121 8 4 48
## 153 153 197 6 23 3
## 154 154 171 3 39 7
## 155 155 187 8 21 1
## 156 156 4 1 11 6
## 157 157 93 9 43 5
## 158 158 149 8 1 3
## 159 159 11 7 36 9
## 160 160 131 7 18 4
## 161 161 172 5 18 1
## 162 162 85 7 35 8
## 163 163 188 4 18 1
## 164 164 163 5 36 8
## 165 165 117 2 14 7
## 166 166 234 5 3 4
## 167 167 17 9 37 6
## 168 168 206 8 5 2
## 169 169 215 4 23 6
## 170 170 284 3 10 6
## 171 171 50 11 6 18
## 172 172 164 5 20 9
## 173 173 19 6 20 1
## 174 174 168 4 7 1
## 175 175 222 4 3 4
## 176 176 276 9 48 9
## 177 177 248 4 30 2
## 178 178 170 2 7 8
## 179 179 276 7 2 3
## 180 180 165 6 10 17
## 181 181 156 6 2 6
## 182 182 218 5 5 4
## 183 183 56 2 5 7
## 184 184 287 6 43 71
## 185 185 253 8 21 3
## 186 186 205 45 1 19
## 187 187 139 5 2 1
## 188 188 191 1 28 7
## 189 189 286 13 9 3
## 190 190 18 7 12 1
## 191 191 39 5 41 1
## 192 192 75 5 10 8
## 193 193 17 2 4 1
## 194 194 166 8 42 3
## 195 195 149 7 35 6
## 196 196 38 2 3 7
## 197 197 94 2 4 9
## 198 198 177 9 3 6
## 199 199 283 6 42 66
## 200 200 232 1 8 6
smp_size <- floor(0.80 * nrow(df))
set.seed(1111)
train_ind <- sample(seq_len(nrow(df)), size = smp_size)
train <- df[train_ind, ]
test <- df[-train_ind, ]
head(train, 10)
## Market TV Radio Newspaper Sales
## 44 44 206 9 8 4
## 182 182 218 5 5 4
## 50 50 66 9 11 7
## 26 26 262 9 3 5
## 36 36 290 7 4 1
## 70 70 216 8 43 9
## 111 111 225 8 8 2
## 188 188 191 1 28 7
## 63 63 239 3 15 5
## 39 39 43 1 26 7
head(test, 10)
## Market TV Radio Newspaper Sales
## 3 3 17 2 45 9
## 6 6 8 7 48 9
## 20 20 147 3 23 9
## 28 28 240 1 16 7
## 29 29 248 8 27 1
## 35 35 95 7 1 4
## 48 48 239 9 41 5
## 54 54 182 6 46 2
## 55 55 262 7 28 8
## 66 66 69 9 3 0
market <- as.numeric(df$Market)
tv <- as.numeric(df$TV)
radio <- as.numeric(df$Radio)
newspaper <- as.numeric(df$Newspaper)
sales <- as.numeric(df$Sales)
model <- cbind.data.frame(market, tv, radio, newspaper, sales)
mlr5 = lm(model$sales ~ model$tv + model$market + model$radio + model$newspaper)
summary(mlr5)
##
## Call:
## lm(formula = model$sales ~ model$tv + model$market + model$radio +
## model$newspaper)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.081 -4.880 -1.820 0.854 59.297
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.2759488 2.3017848 0.989 0.32400
## model$tv 0.0009845 0.0082372 0.120 0.90499
## model$market 0.0201500 0.0123029 1.638 0.10307
## model$radio 0.3680533 0.1271022 2.896 0.00421 **
## model$newspaper 0.0750768 0.0477328 1.573 0.11737
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.951 on 195 degrees of freedom
## Multiple R-squared: 0.06112, Adjusted R-squared: 0.04186
## F-statistic: 3.173 on 4 and 195 DF, p-value: 0.01487
print("Radio is the only significant variable at the 5% level. Statistaclly radio has the only p-value below alpha 0.05, which means its the only variable that is of segnificance for our question. Practically, the sales resolts could be dependant on unknown variables. The etimator for the newspaper is n ot negative.")
## [1] "Radio is the only significant variable at the 5% level. Statistaclly radio has the only p-value below alpha 0.05, which means its the only variable that is of segnificance for our question. Practically, the sales resolts could be dependant on unknown variables. The etimator for the newspaper is n ot negative."
testMarket <- as.numeric(test$Market)
testTV <- as.numeric(test$TV)
testRadio <- as.numeric(test$Radio)
testNewspaper <- as.numeric(test$Newspaper)
testSales <- as.numeric(test$Sales)
testModel <- cbind.data.frame(testMarket, testTV, testRadio, testNewspaper, testSales)
testMLR = lm(testModel$testSales ~ testModel$testTV + testModel$testMarket + testModel$testRadio + testModel$testNewspaper)
summary(testMLR)
##
## Call:
## lm(formula = testModel$testSales ~ testModel$testTV + testModel$testMarket +
## testModel$testRadio + testModel$testNewspaper)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.709 -5.188 -1.463 3.096 47.027
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.03854 5.92997 -0.850 0.40128
## testModel$testTV 0.02983 0.01887 1.581 0.12281
## testModel$testMarket 0.05906 0.03026 1.952 0.05900 .
## testModel$testRadio -1.12948 0.66640 -1.695 0.09898 .
## testModel$testNewspaper 0.38044 0.11330 3.358 0.00191 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.61 on 35 degrees of freedom
## Multiple R-squared: 0.3239, Adjusted R-squared: 0.2467
## F-statistic: 4.192 on 4 and 35 DF, p-value: 0.007062
plot(testMLR)
testSample <- length(testSales)
print(testSample)
## [1] 40
testMean <- mean(testSales)
print(testMean)
## [1] 8.075
testSD <- sd(testSales, na.rm = TRUE)
testSE <- (testSD / testSample)
print(testSE)
## [1] 0.3055778
testAlpha = 0.05
testDF = (testSample - 1)
testCV = qt(testAlpha / 2, testDF, lower.tail = F)
print(testCV)
## [1] 2.022691
testME = (testCV * testSE)
testLB = testMean - testME
testUB = testMean + testME
testCI = c(testLB, testUB)
print(testCI)
## [1] 7.456911 8.693089
testMarket <- as.numeric(test$Market)
testTV <- as.numeric(test$TV)
testRadio <- as.numeric(test$Radio)
testNewspaper <- as.numeric(test$Newspaper)
testSales <- as.numeric(test$Sales)
testModel <- cbind.data.frame(testMarket, testTV, testRadio, testNewspaper, testSales)
testMLR = lm(testModel$testSales ~ testModel$testTV^2 + testModel$testMarket^2 + testModel$testRadio^2 + testModel$testNewspaper^2)
summary(testMLR)
##
## Call:
## lm(formula = testModel$testSales ~ testModel$testTV^2 + testModel$testMarket^2 +
## testModel$testRadio^2 + testModel$testNewspaper^2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.709 -5.188 -1.463 3.096 47.027
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.03854 5.92997 -0.850 0.40128
## testModel$testTV 0.02983 0.01887 1.581 0.12281
## testModel$testMarket 0.05906 0.03026 1.952 0.05900 .
## testModel$testRadio -1.12948 0.66640 -1.695 0.09898 .
## testModel$testNewspaper 0.38044 0.11330 3.358 0.00191 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.61 on 35 degrees of freedom
## Multiple R-squared: 0.3239, Adjusted R-squared: 0.2467
## F-statistic: 4.192 on 4 and 35 DF, p-value: 0.007062
plot(testMLR)
testSample <- length(testSales)
print(testSample)
## [1] 40
testMean <- mean(testSales)
print(testMean)
## [1] 8.075
testSD <- sd(testSales, na.rm = TRUE)
testSE <- (testSD / testSample)
print(testSE)
## [1] 0.3055778
testAlpha = 0.05
testDF = (testSample - 1)
testCV = qt(testAlpha / 2, testDF, lower.tail = F)
print(testCV)
## [1] 2.022691
testME = (testCV * testSE)
testLB = testMean - testME
testUB = testMean + testME
testCI = c(testLB, testUB)
print(testCI)
## [1] 7.456911 8.693089
print("No change to original model observed.")
## [1] "No change to original model observed."
As per the above, no such observation was present!
#### It seems I have an error in steps c & d, since difference in observation exists to run test on.
library(faraway)
data(teengamb)
head(teengamb)
## sex status income verbal gamble
## 1 1 51 2.00 8 0.0
## 2 1 28 2.50 8 0.0
## 3 1 37 2.00 6 0.0
## 4 1 28 7.00 4 7.3
## 5 1 65 2.00 8 19.6
## 6 1 61 3.47 6 0.1
mlrModel = lm(teengamb$gamble ~ teengamb$sex + teengamb$status + teengamb$income + teengamb$verbal)
summary(mlrModel)
##
## Call:
## lm(formula = teengamb$gamble ~ teengamb$sex + teengamb$status +
## teengamb$income + teengamb$verbal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.082 -11.320 -1.451 9.452 94.252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.55565 17.19680 1.312 0.1968
## teengamb$sex -22.11833 8.21111 -2.694 0.0101 *
## teengamb$status 0.05223 0.28111 0.186 0.8535
## teengamb$income 4.96198 1.02539 4.839 1.79e-05 ***
## teengamb$verbal -2.95949 2.17215 -1.362 0.1803
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.69 on 42 degrees of freedom
## Multiple R-squared: 0.5267, Adjusted R-squared: 0.4816
## F-statistic: 11.69 on 4 and 42 DF, p-value: 1.815e-06
plot(mlrModel)
### Points 24 and 39 seem to exhibit a constant variance violation. From the Cook's distance diagram, it seems we should test the model withpout point 24 to gage its influence. Point 39 seems to be a leverage point. Aside from those two points, the model seems to have no violations or unusual observations.